package xiaoa.java.spider;

import java.io.File;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

import org.apache.commons.io.FileUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;


/**
 * 明星抓取爬虫
 * @author xiaoa
 * @date 2017年12月9日 上午8:04:28
 * @version V1.0
 *
 */
public class MXSpider {
	
	  public static void main(String[] args)throws Exception {
		
		  Document doc =  Jsoup.parse(new URL("http://g.manmankan.com/dy2013/mingxing/neidi/nanmingxing.shtml#"), (int)TimeUnit.MINUTES.toMillis(5));
		  
		  List<String>  hotList = new ArrayList<>();
		  
		  List<String>  normalList = new ArrayList<>();

		  
		  Elements es = doc.select(".mx_list_s");
		  
		  es.stream().forEach( e -> {
			  
			 String id =  e.attr("id");
			 
			 List<String> nameList = e.select("a").stream().map( a ->  a.text()).collect(Collectors.toList());
			 
			 if (id != null && id.equals("mx_1")){
				 
				 hotList.addAll(nameList);
			 }else {
				 normalList.addAll(nameList);
			 }
			  System.out.println("aaa");

			  
		  });
		  
		  FileUtils.writeLines(new File("E:\\数据\\明星\\hotList.txt"), hotList);
		  FileUtils.writeLines(new File("E:\\数据\\明星\\normalList.txt"), normalList);
		  
		  System.out.println("完成");
	}
	
	

}
